Library Loading for the analysis

library(data.table)
library(tidyverse)
library(rtracklayer)
library(DESeq2)
library(Biostrings)
library(Rsubread)
library(pheatmap)
library(gridExtra)
library(ggrepel)
library(CLIPanalyze)
library(ggplot2)
library(RColorBrewer)
library(mixOmics)
library(plotly)

Loading merged data

Load the rda file from merging all peaks and filtered peaks from HVA/HVAK individual CLIPanalyze call. Counting was re-do using featureCount and DESeq2 were called to do differential analysis.

dir.create("PDF_figure/CLIP_DESeq_09282019_merged", showWarnings = FALSE)

load("../Merged_Analysis/merged_peak_analysis.rda")

plotMA(dds.genes,
       main = "MA plot for all HEAP vs IC\n in genes outside peaks")

pdf("PDF_figure/CLIP_DESeq_09282019_merged/MAPlot_all_HEAPvIC_OutsidePeaks.pdf",
    height = 4,
    width = 5)
plotMA(dds.genes,
       main = "MA plot for all HEAP vs IC\n in genes outside peaks")
dev.off()
## quartz_off_screen 
##                 2
plotMA(dds.peaks,
       main = "MA plot for all HEAP vs IC in peaks,\none-sided test")

pdf("PDF_figure/CLIP_DESeq_09282019_merged/MAPlot_all_HEAPvIC_InPeaks.pdf",
    height = 4,
    width = 5)
plotMA(dds.peaks,
       main = "MA plot for all HEAP vs IC in peaks,\none-sided test")
dev.off()
## quartz_off_screen 
##                 2

Peak filtering

No peak filtering was done here as peak was filtred and selected in their individual analysis.

HVA/HVAK comparison

plotMA(hvak.hva.res,
       main = "MA plot for HVAK over HVA\nin all peaks (significant over input)",
       xlab = "Mean of normalized counts")

pdf("PDF_figure/CLIP_DESeq_09282019_merged/MAPlot_HVAKvHVA_peaks.pdf",
    height = 4,
    width = 5)
plotMA(hvak.hva.res,
       main = "MA plot for HVAK over HVA\nin all peaks (significant over input)",
       xlab = "Mean of normalized counts")
dev.off()
## quartz_off_screen 
##                 2
summary(hvak.hva.res)
## 
## out of 7653 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1760, 23%
## LFC < 0 (down)     : 48, 0.63%
## outliers [1]       : 40, 0.52%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
dds_transform <- varianceStabilizingTransformation(dds.peaks.hva.hvak)
mat.dist = as.data.frame(assay(dds_transform))
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
setwd("/Users/mizuhi/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_09282019/Analysis/CLIP_Analysis/Data Visualization")
png('Hierchical_Clustering_merged_peaks.png')
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
suppressMessages(dev.off())
## quartz_off_screen 
##                 2
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Hierchical_Clustering_merged_peaks.pdf")
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
dev.off()
## quartz_off_screen 
##                 2

Final output is following: Hierchical Clustering

plotPCA(dds_transform, intgroup = "tumor", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-50,130) + ylim(-60,50)

pdf("PDF_figure/CLIP_DESeq_09282019_merged/PCA_merged_peaks.pdf",
    height = 4,
    width = 5)
plotPCA(dds_transform, intgroup = "tumor", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-50,130) + ylim(-60,50)
dev.off()
## quartz_off_screen 
##                 2

I would like to just do PCA on samples from HEAP-CLIP_05212019.

dds_05212019 <- dds_transform[, c(paste0("HVA",1:3), paste0("HVAK",1:3))]
plotPCA(dds_05212019, intgroup = "tumor", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-80,40) + ylim(-40,30)

pdf("PDF_figure/CLIP_DESeq_09282019_merged/PCA_merged_peaks_batch1.pdf",
    height = 4,
    width = 5)
plotPCA(dds_05212019, intgroup = "tumor", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-80,40) + ylim(-40,30)
dev.off()
## quartz_off_screen 
##                 2

I would like to just do PCA on samples from HEAP-CLIP_09282019.

dds_09282019 <- dds_transform[, c(paste0("HVA",4:6), paste0("HVAK",4:6))]
plotPCA(dds_09282019, intgroup = "tumor", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-75,130) + ylim(-40,40)

pdf("PDF_figure/CLIP_DESeq_09282019_merged/PCA_merged_peaks_batch2.pdf",
    height = 4,
    width = 5)
plotPCA(dds_09282019, intgroup = "tumor", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-75,130) + ylim(-40,40)
dev.off()
## quartz_off_screen 
##                 2
filtered.peaks$hvak.hva.padj <- as.numeric(NA)
filtered.peaks$hvak.hva.log2FC <- as.numeric(NA)
filtered.peaks[rownames(hvak.hva.res), ]$hvak.hva.padj <-
    hvak.hva.res$padj
filtered.peaks[rownames(hvak.hva.res), ]$hvak.hva.log2FC <-
    hvak.hva.res$log2FoldChange

Save the datasets

saveRDS(filtered.peaks, "Datafiles/merged-peaks-filtered-09282019.rds")

Map seed to peaks

mirna.info.family <- readRDS("mirna-info-family-seedmatches.rds")  
assignMirToPeaks <- function(miRNA = mirs, 
                             peaks = es.peaks.utr3,
                             database = mirna.info.family){
  require(BSgenome)
  require(CLIPanalyze)
  require(Biostrings)
  bsgenome <- load.bsgenome("mm10")
  peaks.seq <- get.seqs(bsgenome, peaks)
  peaks$seed.8mer <- as.character(NA)
  peaks$seed.7m8 <- as.character(NA)
  peaks$seed.7A1 <- as.character(NA)
  peaks$seed.6mer <- as.character(NA)
  
  for (i in 1:length(miRNA)){
    mir <- miRNA[i]
    #Prepare seed matches
    mir.6m <- as.character(database[database$miR.family %in% mir, ]$seedmatch.6)
    mir.7m8 <- as.character(database[database$miR.family %in% mir, ]$seedmatch.m8)
    mir.7mA <- as.character(database[database$miR.family %in% mir, ]$seedmatch.A1)
    mir.8m <- as.character(database[database$miR.family %in% mir, ]$seedmatch.8)
    #Filter peaks with 6mer matches
    match <- GRanges(vmatchPattern(mir.6m, peaks.seq))
    #Go back to peaks and map the seed match and extend 1 nt on both directions
    match.extend <- peaks[seqnames(match),]
    match.strand <- as.logical(strand(match.extend) == "+")
    
    start(match.extend[match.strand,]) <- start(match.extend[match.strand,]) + start(match[match.strand,]) - 2
    match.extend[match.strand,] <- resize(match.extend[match.strand,], fix = "start", width = 8)
    
    end(match.extend[!match.strand,]) <- end(match.extend[!match.strand,]) - start(match[!match.strand,]) + 2
    match.extend[!match.strand,] <- resize(match.extend[!match.strand,], fix = "start", width = 8)
    #Assign seed types to these matches
    match.extend.seq <- get.seqs(bsgenome, match.extend)
    match.extend.seq.df <- data.frame(Peaks = names(match.extend.seq),
                                      Sequence = as.character(match.extend.seq))
    
    for (j in 1:nrow(match.extend.seq.df)){
      peak.name <- as.character(match.extend.seq.df[j, "Peaks"])
      seq <- as.character(match.extend.seq.df[j, "Sequence"])
      if (grepl(mir.8m, seq)){
        peaks[peak.name, ]$seed.8mer <- ifelse(is.na(peaks[peak.name, ]$seed.8mer),
                                               mir, 
                                               paste(peaks[peak.name, ]$seed.8mer, mir, sep = ", "))
      } else if (grepl(mir.7m8, seq)){
        peaks[peak.name, ]$seed.7m8 <- ifelse(is.na(peaks[peak.name, ]$seed.7m8),
                                              mir, 
                                              paste(peaks[peak.name, ]$seed.7m8, mir, sep = ", "))
      } else if (grepl(mir.7mA, seq)){
        peaks[peak.name, ]$seed.7A1 <- ifelse(is.na(peaks[peak.name, ]$seed.7A1),
                                              mir, 
                                              paste(peaks[peak.name, ]$seed.7A1, mir, sep = ", "))
      } else {
        peaks[peak.name, ]$seed.6mer <- ifelse(is.na(peaks[peak.name, ]$seed.6mer),
                                                mir, 
                                                paste(peaks[peak.name, ]$seed.6mer, mir, sep = ", "))
      }
    }
  }
  
  return(peaks)
}

miRNAs with mean counts larger than 200 are selected and mapped to peaks

mirna.family.DGE <- readRDS("Datafiles/mirna-counts-deseq-by-family-09282019.rds")
mirs <- subset(mirna.family.DGE, baseMean > 200)
mirs <- mirs$miR.family
#peaks.mirs <- assignMirToPeaks(miRNA = mirs,
#                               peaks = filtered.peaks,
#                               database = mirna.info.family)
#saveRDS(peaks.mirs, "Datafiles/merged-peaks-mirs-200-09282019.rds")
peaks.mirs <- readRDS("Datafiles/merged-peaks-mirs-200-09282019.rds")
targetofmiR <- function(peaks.mir = brain.peaks.mirs,
                        miRNA = "",
                        sitetype = "8mer"){
  peaks.mir.sub <- as.data.frame(peaks.mir[,c("hvak.hva.log2FC", "hvak.hva.padj", 
                                              "seed.8mer", "seed.7m8", "seed.7A1", "seed.6mer")])
  peaks.seedmatch <- lapply(c("seed.8mer", "seed.7m8", "seed.7A1", "seed.6mer"),
                            function(seed){
                              map <- peaks.mir.sub[grepl(miRNA, peaks.mir.sub[,seed]),]
                              map <- rownames(map)
                              map
                            })
  names(peaks.seedmatch) <- c("seed.8mer", "seed.7m8", "seed.7A1", "seed.6mer")

  if (sitetype == "8mer"){
    maps <- peaks.seedmatch[[1]]
  } else if (sitetype == "7mer_above"){
    maps <- unique(unlist(peaks.seedmatch[1:3]))
  } else if (sitetype == "7mer"){
    maps <- unique(unlist(peaks.seedmatch[2:3]))
  } else if (sitetype == "6mer"){
    maps <- peaks.seedmatch[[4]]
  } else {
    print("Please input site type as: 8mer, 7mer_above, 7mer or 6mer")
  }
    return(peaks.mir[maps])
                        }
mirs.peaks <- lapply(mirs,
                     function(mir){
                       targetofmiR(miRNA = mir,
                                   peaks.mir = peaks.mirs,
                                   sitetype = "7mer_above")
                     })
names(mirs.peaks) <- mirs
saveRDS(mirs.peaks, "Datafiles/miRNA-merged-peaks-list-09282019.rds")
lens <- as.data.frame(sapply(mirs.peaks, function(x) length(x)))
mirs.peaks.log2FC.median <- sapply(mirs.peaks,
                            function(list){
                              log2FCs <- list$hvak.hva.log2FC
                              return(median(log2FCs[!is.na(log2FCs)]))
                            })
mirs.peaks.log2FC.mean <- sapply(mirs.peaks,
                            function(list){
                              log2FCs <- list$hvak.hva.log2FC
                              return(mean(log2FCs[!is.na(log2FCs)]))
                            })

mirs.targets.log2FC <- cbind(as.data.frame(mirs.peaks.log2FC.median),
                             as.data.frame(mirs.peaks.log2FC.mean),
                             lens)
colnames(mirs.targets.log2FC) <- c("targets_log2FC_median","targets_log2FC_mean", "N")

mirs.targets.log2FC <- merge(mirs.targets.log2FC, mirna.family.DGE[,c("log2FoldChange", "padj")], by = 0)
colnames(mirs.targets.log2FC)[1] <- "miR.family"

Show any miRNA that has more than 20 peaks matched

df <- subset(mirs.targets.log2FC, N > 20)
p <- ggplot(df, aes(x = log2FoldChange, y = targets_log2FC_median, label = miR.family, size = N)) +
  geom_point(colour = "#ECAC46", alpha = 0.6) +
  #scale_color_manual(fill = "yellow") +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
  xlab("miRNA expression Fold change log2 (HVAK / HVA)") +
  ylab("Median of peak signal changes log2 (HVAK / HVA)") +
  theme_bw() +
      theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p1 <- ggplotly(p)
p1
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Median_peak_signal_LFC.pdf",
    width = 5,
    height = 4)
p
dev.off()
## quartz_off_screen 
##                 2
p <- ggplot(df, aes(x = log2FoldChange, y = targets_log2FC_mean, label = miR.family, size = N)) +
  geom_point(colour = "#7AEF64", alpha = 0.6) +
  #scale_color_manual(fill = "yellow") +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
  xlab("miRNA expression Fold change log2 (HVAK / HVA)") +
  ylab("Mean of peak signal changes log2 (HVAK / HVA)") +
  theme_bw() +
      theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p2 <- ggplotly(p)
p2
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Mean_peak_signal_LFC.pdf",
    width = 5,
    height = 4)
p
dev.off()
## quartz_off_screen 
##                 2

If we only look at the top 40 highly expressed miRNAs

mirna.family.DGE <- mirna.family.DGE[order(-mirna.family.DGE$baseMean),]
mirs.40 <- rownames(mirna.family.DGE)[1:40]

df.40 <- df[df$miR.family %in% mirs.40,]
p <- ggplot(df.40, aes(x = log2FoldChange, y = targets_log2FC_median, label = miR.family, size = N)) +
  geom_point(colour = "#ECAC46", alpha = 0.6) +
  #scale_color_manual(fill = "yellow") +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
  xlab("Top 40 miRNA expression Fold change log2 (HVAK / HVA)") +
  ylab("Median of peak signal changes log2 (HVAK / HVA)") +
  theme_bw() +
      theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p3 <- ggplotly(p)
p3
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Median_peak_signal_LFC_top40_miRNA.pdf",
    width = 5,
    height = 4)
p
dev.off()
## quartz_off_screen 
##                 2
p <- ggplot(df.40, aes(x = log2FoldChange, y = targets_log2FC_mean, label = miR.family, size = N)) +
  geom_point(colour = "#7AEF64", alpha = 0.6) +
  #scale_color_manual(fill = "yellow") +
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
  xlab("Top 40 miRNA expression Fold change log2 (HVAK / HVA)") +
  ylab("Mean of peak signal changes log2 (HVAK / HVA)") +
  theme_bw() +
      theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p4 <- ggplotly(p)
p4
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Mean_peak_signal_LFC_top40_miRNA.pdf",
    width = 5,
    height = 4)
p
dev.off()
## quartz_off_screen 
##                 2

Here are the top 10 miRNAs with the most peaks associated.

color.vec <- brewer.pal(name = "Spectral", n = 11)
df <- as.data.frame(filtered.peaks)
df$hvak.hva.logP <- -log10(as.numeric(df$hvak.hva.padj))
len <- sapply(mirs.peaks, function(x) length(x))
mirs.peaks <- mirs.peaks[order(-len)]
mirna <- names(mirs.peaks)
mirs.peaks.names <- lapply(mirs.peaks, function(x) names(x))

for (i in 1:10){
  p <- ggplot() + 
    geom_point(data = df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.5, color = "grey80") +
    geom_point(data = df[mirs.peaks.names[[mirna[i]]],], aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.7, color = "#7570B3") +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", size = 0.3) +
    geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", size = 0.3) +
    xlab("Fold change log2 (HVAK / HVA)") +
    ylab("-log10(FDR)") +
    theme_bw() +
    theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
    #guides(fill = guide_legend(title = "miRNA family")) +
    ggtitle(sprintf("%s targets", mirna[i ]))
  print(p)
}
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

pdf("PDF_figure/CLIP_DESeq_09282019_merged/VolcanoPlot_Peak_by_miRNAFamily.pdf",
    width = 5,
    height = 4)
for (i in 1:10){
  p <- ggplot() + 
    geom_point(data = df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.5, color = "grey80") +
    geom_point(data = df[mirs.peaks.names[[mirna[i]]],], aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.7, color = "#7570B3") +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", size = 0.3) +
    geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", size = 0.3) +
    xlab("Fold change log2 (HVAK / HVA)") +
    ylab("-log10(FDR)") +
    theme_bw() +
    theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
    #guides(fill = guide_legend(title = "miRNA family")) +
    ggtitle(sprintf("%s targets", mirna[i ]))
  print(p)
}
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).

## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
# Plot all peak density
ggplot(df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP)) +
  geom_hex(bins = 40, color = "white", aes(fill = stat(log2(count)))) +
  scale_fill_viridis_c() +
  geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
    xlab("Fold change log2 (HVAK / HVA)") +
    ylab("-log10(FDR)") +
    theme_bw() +
    theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
    #guides(fill = guide_legend(title = "miRNA family")) +
    ggtitle("All peaks")
## Warning: Removed 117 rows containing non-finite values (stat_binhex).

# Number of peaks with LFC on either side
# LFC > 0
LFC <- df$hvak.hva.log2FC[!is.na(df$hvak.hva.log2FC)]
sum(LFC > 0)
## [1] 7074
sum(LFC > 0)/length(LFC) * 100
## [1] 92.43434
# LFC < 0
sum(LFC < 0)
## [1] 579
sum(LFC < 0)/length(LFC) * 100
## [1] 7.565661
pdf("PDF_figure/CLIP_DESeq_09282019_merged/VolcanoPlot_all_peaks.pdf",
    width = 5,
    height = 4)
# Plot density of peak points on teh volcano plot
ggplot(df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP)) +
  geom_hex(bins = 40, color = "white", aes(fill = stat(log2(count)))) +
  scale_fill_viridis_c() +
  geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
    xlab("Fold change log2 (HVAK / HVA)") +
    ylab("-log10(FDR)") +
    theme_bw() +
    theme(panel.border = element_blank(),
      panel.background = element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.x = element_text(size=12, margin = margin(t = 10)),
      axis.title.y = element_text(size=12, margin = margin(r = 10)),
      axis.text = element_text(size=10),
      axis.line.y = element_line(size = 0.5),
      axis.line.x = element_line(size = 0.5),
      axis.ticks.x = element_line(size = 0),
      axis.ticks.y = element_line(size = 0.5),
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
    #guides(fill = guide_legend(title = "miRNA family")) +
    ggtitle("All peaks")
## Warning: Removed 117 rows containing non-finite values (stat_binhex).
dev.off()
## quartz_off_screen 
##                 2

I just want a table of all top 40 miRNAs and the number of peaks that are associated with them with LFC (HVAK/HVA) > 0 and < 0.

peak.number <- as.data.frame(table(mirs.peaks[[1]]$hvak.hva.log2FC > 0))
peak.number <- t(peak.number[,-1])
colnames(peak.number) <- c("LFC(HVAK/HVA) < 0", "LFC(HVAK/HVA) > 0")
for (i in 2: 40) {
  new.peak <- as.data.frame(table(mirs.peaks[[i]]$hvak.hva.log2FC > 0))
  new.peak <- t(new.peak[,-1])
  peak.number <- rbind(peak.number, new.peak)
}

rownames(peak.number) <- names(mirs.peaks)[1:40]
peak.number <- rbind(peak.number, c(sum(df$hvak.hva.log2FC > 0), sum(df$hvak.hva.log2FC < 0)))
rownames(peak.number)[dim(peak.number)[1]] <- "Total"
peak.number
##                                      LFC(HVAK/HVA) < 0 LFC(HVAK/HVA) > 0
## let-7-5p/miR-98-5p                                  56               738
## miR-15-5p/16-5p/195-5p/322-5p/497-5p                10               468
## miR-29-3p                                           14               454
## miR-200-3p/429-3p                                   43               390
## miR-17-5p/20-5p/93-5p/106-5p                        33               403
## miR-24-3p                                           16               371
## miR-194-5p                                          15               342
## miR-27-3p                                            7               292
## miR-196-5p                                          14               251
## miR-103-3p/107-3p                                    5               254
## miR-23-3p                                           10               239
## miR-26-5p                                           15               228
## miR-19-3p                                           13               218
## miR-25-3p/32-5p/92-3p/363-3p/367-3p                 12               197
## miR-30-5p/384-5p                                    15               168
## miR-141-3p                                          12               170
## miR-484                                              5               174
## miR-130-3p/301-3p                                   14               153
## miR-22-3p                                            6               146
## miR-96-5p                                           15               137
## miR-181-5p                                          13               133
## miR-182-5p                                          17               124
## miR-148-3p/152-3p                                   16               123
## miR-7-5p                                             1               128
## miR-205-5p                                          11               117
## miR-34-5p/449-5p                                     9               105
## miR-132-3p/212-3p                                    5                99
## miR-30a-3p/30d-3p/30e-3p                             4                97
## miR-31-5p                                           37                51
## miR-194-2-3p/6926-5p/7055-5p                         6                81
## miR-423-5p                                           2                83
## miR-141-5p/6769b-3p                                 10                75
## miR-21ac-5p                                          7                75
## miR-192-5p/215-5p                                    9                72
## miR-139-5p                                           3                76
## miR-193-3p                                           1                75
## miR-185-5p                                           5                71
## miR-671-5p                                           6                65
## miR-135ab-5p                                         2                63
## miR-18-3p/7069-3p                                    3                61
## Total                                               NA                NA

SessionInfo

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] plotly_4.10.0               mixOmics_6.16.3            
##  [3] lattice_0.20-45             MASS_7.3-54                
##  [5] RColorBrewer_1.1-2          CLIPanalyze_0.0.10         
##  [7] GenomicAlignments_1.28.0    Rsamtools_2.8.0            
##  [9] GenomicFeatures_1.44.2      AnnotationDbi_1.54.1       
## [11] plyr_1.8.6                  ggrepel_0.9.1              
## [13] gridExtra_2.3               pheatmap_1.0.12            
## [15] Rsubread_2.6.4              Biostrings_2.60.2          
## [17] XVector_0.32.0              DESeq2_1.32.0              
## [19] SummarizedExperiment_1.22.0 Biobase_2.52.0             
## [21] MatrixGenerics_1.4.3        matrixStats_0.61.0         
## [23] rtracklayer_1.52.1          GenomicRanges_1.44.0       
## [25] GenomeInfoDb_1.28.4         IRanges_2.26.0             
## [27] S4Vectors_0.30.2            BiocGenerics_0.38.0        
## [29] forcats_0.5.1               stringr_1.4.0              
## [31] dplyr_1.0.7                 purrr_0.3.4                
## [33] readr_2.1.0                 tidyr_1.1.4                
## [35] tibble_3.1.6                ggplot2_3.3.5              
## [37] tidyverse_1.3.1             data.table_1.14.2          
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.4.0        BiocFileCache_2.0.0   
##   [4] igraph_1.2.9           lazyeval_0.2.2         splines_4.1.1         
##   [7] crosstalk_1.2.0        BiocParallel_1.26.2    digest_0.6.28         
##  [10] htmltools_0.5.2        fansi_0.5.0            magrittr_2.0.1        
##  [13] memoise_2.0.1          tzdb_0.2.0             annotate_1.70.0       
##  [16] modelr_0.1.8           rARPACK_0.11-0         prettyunits_1.1.1     
##  [19] colorspace_2.0-2       blob_1.2.2             rvest_1.0.2           
##  [22] rappdirs_0.3.3         haven_2.4.3            xfun_0.28             
##  [25] hexbin_1.28.2          crayon_1.4.2           RCurl_1.98-1.5        
##  [28] jsonlite_1.7.2         genefilter_1.74.1      survival_3.2-13       
##  [31] glue_1.5.0             gtable_0.3.0           zlibbioc_1.38.0       
##  [34] DelayedArray_0.18.0    scales_1.1.1           DBI_1.1.1             
##  [37] Rcpp_1.0.7             viridisLite_0.4.0      xtable_1.8-4          
##  [40] progress_1.2.2         bit_4.0.4              biosignals_0.1.0      
##  [43] htmlwidgets_1.5.4      httr_1.4.2             ellipsis_0.3.2        
##  [46] farver_2.1.0           pkgconfig_2.0.3        XML_3.99-0.8          
##  [49] dbplyr_2.1.1           locfit_1.5-9.4         utf8_1.2.2            
##  [52] tidyselect_1.1.1       labeling_0.4.2         rlang_0.4.12          
##  [55] reshape2_1.4.4         munsell_0.5.0          cellranger_1.1.0      
##  [58] tools_4.1.1            cachem_1.0.6           cli_3.1.0             
##  [61] generics_0.1.1         RSQLite_2.2.8          broom_0.7.10          
##  [64] evaluate_0.14          fastmap_1.1.0          yaml_2.2.1            
##  [67] knitr_1.36             bit64_4.0.5            fs_1.5.0              
##  [70] KEGGREST_1.32.0        xml2_1.3.2             biomaRt_2.48.3        
##  [73] compiler_4.1.1         rstudioapi_0.13        filelock_1.0.2        
##  [76] curl_4.3.2             png_0.1-7              reprex_2.0.1          
##  [79] geneplotter_1.70.0     stringi_1.7.5          highr_0.9             
##  [82] RSpectra_0.16-0        Matrix_1.3-4           vctrs_0.3.8           
##  [85] pillar_1.6.4           lifecycle_1.0.1        jquerylib_0.1.4       
##  [88] bitops_1.0-7           corpcor_1.6.10         R6_2.5.1              
##  [91] BiocIO_1.2.0           assertthat_0.2.1       rjson_0.2.20          
##  [94] withr_2.4.2            GenomeInfoDbData_1.2.6 hms_1.1.1             
##  [97] grid_4.1.1             rmarkdown_2.11         lubridate_1.8.0       
## [100] ellipse_0.4.2          restfulr_0.0.13